{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pip install python-docx\n",
    "from docx import Document\n",
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 提取单个文件内容的函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_docfile(doc_file):\n",
    "    doc = Document(doc_file)\n",
    "    table = doc.tables[0]\n",
    "    \n",
    "    return dict(\n",
    "        姓名 = table.cell(0, 1).text, 性别 = table.cell(0, 3).text,\n",
    "        民族 = table.cell(0, 5).text, 出生年月 = table.cell(0, 7).text,\n",
    "\n",
    "        参加工作时间 = table.cell(1, 1).text, 学历 = table.cell(1, 3).text,\n",
    "        籍贯 = table.cell(1, 5).text, 政治面貌 = table.cell(1, 7).text,\n",
    "\n",
    "        毕业院校 = table.cell(2, 1).text, 专业 = table.cell(2, 4).text,\n",
    "        职务 = table.cell(2, 7).text,\n",
    "\n",
    "        工作单位 = table.cell(3, 1).text, 报考类别 = table.cell(3, 7).text,\n",
    "\n",
    "        工作简历 = table.cell(4, 1).text.strip(),\n",
    "        工作业绩 = table.cell(5, 1).text.strip(),\n",
    "        单位推荐意见 = table.cell(6, 1).text.strip(),\n",
    "        领导意见 = table.cell(7, 1).text.strip(),\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 遍历文件合并到表格"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 列名\n",
    "columns = None\n",
    "# 数据内容\n",
    "datas = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in os.listdir(\"优秀教师选拔考试报名表\"):\n",
    "    if file.endswith(\".docx\"):\n",
    "        file_path = f\"优秀教师选拔考试报名表/{file}\"\n",
    "        \n",
    "        print(\"解析文件\", file_path)\n",
    "        \n",
    "        data = parse_docfile(file_path)\n",
    "        \n",
    "        if not columns:\n",
    "            columns = list(data.keys())\n",
    "            \n",
    "        datas.append([data[column] for column in columns])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(datas, columns = columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>姓名</th>\n",
       "      <th>性别</th>\n",
       "      <th>民族</th>\n",
       "      <th>出生年月</th>\n",
       "      <th>参加工作时间</th>\n",
       "      <th>学历</th>\n",
       "      <th>籍贯</th>\n",
       "      <th>政治面貌</th>\n",
       "      <th>毕业院校</th>\n",
       "      <th>专业</th>\n",
       "      <th>职务</th>\n",
       "      <th>工作单位</th>\n",
       "      <th>报考类别</th>\n",
       "      <th>工作简历</th>\n",
       "      <th>工作业绩</th>\n",
       "      <th>单位推荐意见</th>\n",
       "      <th>领导意见</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>张飞</td>\n",
       "      <td>男</td>\n",
       "      <td>汉</td>\n",
       "      <td>1959.11</td>\n",
       "      <td>1978.7</td>\n",
       "      <td>本科</td>\n",
       "      <td>成都</td>\n",
       "      <td>蜀汉</td>\n",
       "      <td>成都大学</td>\n",
       "      <td>体育</td>\n",
       "      <td>大将军</td>\n",
       "      <td>成都第一中学</td>\n",
       "      <td>体育</td>\n",
       "      <td>从简</td>\n",
       "      <td>优秀</td>\n",
       "      <td>同意</td>\n",
       "      <td>同意</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>周瑜</td>\n",
       "      <td>男</td>\n",
       "      <td>汉</td>\n",
       "      <td>1976.11</td>\n",
       "      <td>1997.7</td>\n",
       "      <td>研究生</td>\n",
       "      <td>建康</td>\n",
       "      <td>东吴</td>\n",
       "      <td>建康大学</td>\n",
       "      <td>语文</td>\n",
       "      <td>大将军</td>\n",
       "      <td>建康第一中学</td>\n",
       "      <td>美术</td>\n",
       "      <td>从简</td>\n",
       "      <td>优秀</td>\n",
       "      <td>同意</td>\n",
       "      <td>同意</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>曹操</td>\n",
       "      <td>男</td>\n",
       "      <td>汉</td>\n",
       "      <td>1953.11</td>\n",
       "      <td>1973.7</td>\n",
       "      <td>本科</td>\n",
       "      <td>许昌</td>\n",
       "      <td>曹魏</td>\n",
       "      <td>许昌大学</td>\n",
       "      <td>语文</td>\n",
       "      <td>大将军</td>\n",
       "      <td>许昌第一中学</td>\n",
       "      <td>语文</td>\n",
       "      <td>从简</td>\n",
       "      <td>优秀</td>\n",
       "      <td>同意</td>\n",
       "      <td>同意</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>小乔</td>\n",
       "      <td>女</td>\n",
       "      <td>汉</td>\n",
       "      <td>1978.3</td>\n",
       "      <td>1996.7</td>\n",
       "      <td>研究生</td>\n",
       "      <td>建康</td>\n",
       "      <td>东吴</td>\n",
       "      <td>建康大学</td>\n",
       "      <td>音乐</td>\n",
       "      <td>无</td>\n",
       "      <td>建康第一中学</td>\n",
       "      <td>音乐</td>\n",
       "      <td>从简</td>\n",
       "      <td>优秀</td>\n",
       "      <td>同意</td>\n",
       "      <td>同意</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>曹丕</td>\n",
       "      <td>男</td>\n",
       "      <td>汉</td>\n",
       "      <td>1973.11</td>\n",
       "      <td>1993.7</td>\n",
       "      <td>博士</td>\n",
       "      <td>许昌</td>\n",
       "      <td>曹魏</td>\n",
       "      <td>许昌大学</td>\n",
       "      <td>语文</td>\n",
       "      <td>太子</td>\n",
       "      <td>许昌第一中学</td>\n",
       "      <td>英语</td>\n",
       "      <td>从简</td>\n",
       "      <td>优秀</td>\n",
       "      <td>同意</td>\n",
       "      <td>同意</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>刘备</td>\n",
       "      <td>男</td>\n",
       "      <td>汉</td>\n",
       "      <td>1956.11</td>\n",
       "      <td>1975.7</td>\n",
       "      <td>本科</td>\n",
       "      <td>成都</td>\n",
       "      <td>蜀汉</td>\n",
       "      <td>成都大学</td>\n",
       "      <td>体育</td>\n",
       "      <td>大将军</td>\n",
       "      <td>成都第一中学</td>\n",
       "      <td>语文</td>\n",
       "      <td>从简</td>\n",
       "      <td>优秀</td>\n",
       "      <td>同意</td>\n",
       "      <td>同意</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   姓名 性别 民族     出生年月  参加工作时间   学历  籍贯 政治面貌  毕业院校  专业   职务    工作单位 报考类别 工作简历  \\\n",
       "0  张飞  男  汉  1959.11  1978.7   本科  成都   蜀汉  成都大学  体育  大将军  成都第一中学   体育   从简   \n",
       "1  周瑜  男  汉  1976.11  1997.7  研究生  建康   东吴  建康大学  语文  大将军  建康第一中学   美术   从简   \n",
       "2  曹操  男  汉  1953.11  1973.7   本科  许昌   曹魏  许昌大学  语文  大将军  许昌第一中学   语文   从简   \n",
       "3  小乔  女  汉   1978.3  1996.7  研究生  建康   东吴  建康大学  音乐    无  建康第一中学   音乐   从简   \n",
       "4  曹丕  男  汉  1973.11  1993.7   博士  许昌   曹魏  许昌大学  语文   太子  许昌第一中学   英语   从简   \n",
       "5  刘备  男  汉  1956.11  1975.7   本科  成都   蜀汉  成都大学  体育  大将军  成都第一中学   语文   从简   \n",
       "\n",
       "  工作业绩 单位推荐意见 领导意见  \n",
       "0   优秀     同意   同意  \n",
       "1   优秀     同意   同意  \n",
       "2   优秀     同意   同意  \n",
       "3   优秀     同意   同意  \n",
       "4   优秀     同意   同意  \n",
       "5   优秀     同意   同意  "
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 输出到Excel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel(\"优秀教师选拔考试报名表.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
